import pandas as pd
import datetime
import numpy as np
from scipy.stats import spearmanr

def HK_skill_score(A, B, C, D):
    HK = (A * D - C * B) / ((A + B) * (C + D))
    return HK

def Accuracy(A,B,C,D):
    Acc = (A+D) / (A+B+C+D)
    return Acc

user_input = pd.read_csv ("C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/data-1662467666108 - observations.csv")
users = pd.read_csv("C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/Users.csv")
forecasts = pd.read_csv("C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/data-1662467091031 - meteoblue_forecasts.csv")

users_gbulung = users.loc[users['regionid'] == 4]
forecasts_gbulung = forecasts.loc[forecasts['regionid'] == 4]

forecasts_nakpanzoo = forecasts.loc[forecasts['regionid'] == 2]
users_nakpanzoo = users.loc[users['regionid'] == 2]

# forecasts_nabogu = forecasts.loc[forecasts['regionid'] == 8]
# users_nabogu = users.loc[users['regionid'] == 8]

forecasts_yapalsi = forecasts.loc[forecasts['regionid'] == 3]
users_yapalsi = users.loc[users['regionid'] == 3]

if_forecast = user_input.loc[user_input['datasourcecode']== 'user_local_forecast']
if_forecast.loc['datetime'] = pd.to_datetime(if_forecast['datetime'])
# if_forecast_nabogu = if_forecast.loc[if_forecast['locationid'].isin(users_nabogu['locationid'])]
if_forecast_yapalsi = if_forecast.loc[if_forecast['locationid'].isin(users_yapalsi['locationid'])]
if_forecast_nakpanzoo = if_forecast.loc[if_forecast['locationid'].isin(users_nakpanzoo['locationid'])]
if_forecast_gbullung = if_forecast.loc[if_forecast['locationid'].isin(users_gbulung['locationid'])]

if_forecast_yapalsi = if_forecast_yapalsi.loc[if_forecast_yapalsi['datetime'] < '2022-10-15 00:00:00']
if_forecast_nakpanzoo = if_forecast_nakpanzoo.loc[if_forecast_nakpanzoo['datetime'] < '2022-10-15 00:00:00']
if_forecast_gbullung = if_forecast_gbullung.loc[if_forecast_gbullung['datetime'] < '2022-10-15 00:00:00']
# if_forecast_nabogu = if_forecast_nabogu.loc[if_forecast_nabogu['datetime'] < '2022-10-15 00:00:00']

if_forecast_yapalsi['datetime'] = pd.to_datetime(if_forecast_yapalsi['datetime'] )
if_forecast_nakpanzoo['datetime'] = pd.to_datetime(if_forecast_nakpanzoo['datetime'] )
if_forecast_gbullung['datetime'] = pd.to_datetime(if_forecast_gbullung['datetime'] )

obs_farmers = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/average_obs_NNY')
obs_farmers.loc[:,'datetime'] = pd.to_datetime(obs_farmers.loc[:,'datetime'])
obs_farmers_gbullung = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/average_obs_gbullung')
obs_farmers_gbullung.loc[:,'datetime'] = pd.to_datetime(obs_farmers_gbullung.loc[:,'datetime'])

forecasts_gbulung_sameday = forecasts_gbulung.loc[forecasts_gbulung['datetime'] == forecasts_gbulung['forecastdatetime']]
forecasts_gbulung_sameday_prec = forecasts_gbulung_sameday.loc[forecasts_gbulung_sameday['variablecode'] == 'prec']
forecasts_gbulung_sameday_prec.loc[:,'datetime'] = pd.to_datetime(forecasts_gbulung_sameday_prec.loc[:,'datetime'])

forecasts_nakpanzoo_sameday = forecasts_nakpanzoo.loc[forecasts_nakpanzoo['datetime'] == forecasts_nakpanzoo['forecastdatetime']]
forecasts_nakpanzoo_sameday_prec = forecasts_nakpanzoo_sameday.loc[forecasts_nakpanzoo_sameday['variablecode'] == 'prec']
forecasts_nakpanzoo_sameday_prec.loc[:,'datetime'] = pd.to_datetime(forecasts_nakpanzoo_sameday_prec.loc[:,'datetime'])

forecasts_yapalsi_sameday = forecasts_yapalsi.loc[forecasts_yapalsi['datetime'] == forecasts_yapalsi['forecastdatetime']]
forecasts_yapalsi_sameday_prec = forecasts_yapalsi_sameday.loc[forecasts_yapalsi_sameday['variablecode'] == 'prec']
forecasts_yapalsi_sameday_prec.loc[:,'datetime'] = pd.to_datetime(forecasts_yapalsi_sameday_prec.loc[:,'datetime'])

test_yapalsi = if_forecast_yapalsi.merge(obs_farmers, left_on = 'datetime', right_on = 'datetime')
test_nakpanzoo = if_forecast_nakpanzoo.merge(obs_farmers, left_on = 'datetime', right_on = 'datetime')
test_gbullung = if_forecast_gbullung.merge(obs_farmers_gbullung, left_on = 'datetime', right_on = 'datetime')

#Value_x = IF, Value_y = obs, Value = SF
test_yapalsi_SF = test_yapalsi.merge(forecasts_yapalsi_sameday_prec, left_on = 'datetime', right_on = 'datetime')
test_nakpanzoo_SF = test_nakpanzoo.merge(forecasts_nakpanzoo_sameday_prec, left_on = 'datetime', right_on = 'datetime')
test_gbullung_SF = test_gbullung.merge(forecasts_gbulung_sameday_prec, left_on = 'datetime', right_on = 'datetime')

# combined_NNY_if = pd.concat([if_forecast_nabogu,if_forecast_yapalsi,if_forecast_nakpanzoo])
combined_NNY_if = pd.concat([if_forecast_yapalsi,if_forecast_nakpanzoo])
combined_NNY_if['datetime'] = pd.to_datetime(combined_NNY_if['datetime'])
combined_NNY_if.sort_values(by='datetime',inplace = True)
combined_NNY_if.reset_index(inplace=True)
del combined_NNY_if['index']
combined_NNY_if.to_csv('C:/Users/joepb/PycharmProjects/data_storage/farmer_forecasts.csv',index=False)

# if_forecast_nakpanzoo_single_date = if_forecast_nakpanzoo.groupby('datetime')['value'].agg(pd.Series.mode)

combined_NNY_if_0_1 = pd.DataFrame(index=np.arange(1))
for i in combined_NNY_if.index:
    if combined_NNY_if.loc[i,'value'] > 0:
        combined_NNY_if_0_1.loc[i, 'datetime'] = combined_NNY_if.loc[i,'datetime']
        combined_NNY_if_0_1.loc[i, 'value'] = 1
    if combined_NNY_if.loc[i, 'value'] == 0:
        combined_NNY_if_0_1.loc[i, 'datetime'] = combined_NNY_if.loc[i,'datetime']
        combined_NNY_if_0_1.loc[i, 'value'] = 0


combined_NNY_if_avg_0_1 = combined_NNY_if_0_1.groupby('datetime')['value'].agg(pd.Series.mode)
combined_NNY_if_avg = combined_NNY_if.loc[combined_NNY_if['value']>0]
combined_NNY_if_avg = combined_NNY_if_avg.groupby('datetime')['value'].agg(pd.Series.mean)

combined_NNY_if_final = pd.DataFrame(index=np.arange(1))
for i in combined_NNY_if_avg_0_1.index:
    if type(combined_NNY_if_avg_0_1[i]) == np.float64:
        if combined_NNY_if_avg_0_1[i] == 1.0:
            combined_NNY_if_final[i] = float(combined_NNY_if_avg[i])
        elif combined_NNY_if_avg_0_1[i] == 0.0:
            combined_NNY_if_final[i] = 0
    else:
        combined_NNY_if_final[i] = 0


        # if combined_NNY_if_avg[i][0] == 0:
        #     combined_NNY_if_final[i] = 0
        # if combined_NNY_if_avg[i][0] != 0:
        #     combined_NNY_if_final[i] = np.mean(combined_NNY_if_avg[i])
combined_NNY_if_final = np.transpose(combined_NNY_if_final)
combined_NNY_if_final = combined_NNY_if_final.rename(columns={0:'value'})
combined_NNY_if_final = combined_NNY_if_final.merge(obs_farmers,left_on = combined_NNY_if_final.index , right_on = 'datetime')
combined_NNY_if_final = combined_NNY_if_final.merge(forecasts_nakpanzoo_sameday_prec,left_on = 'datetime',right_on = 'datetime')

combined_NNY_if_final.set_index(combined_NNY_if_final['datetime'],inplace=True)

#
# users = pd.read_csv("C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/Users.csv")
# obs_farmers = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/average_obs_NNY')
# obs_farmers['datetime'] = pd.to_datetime(obs_farmers['datetime'])
# obs_farmers_gbullung = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/average_obs_gbullung')
# obs_farmers_gbullung['datetime'] = pd.to_datetime(obs_farmers_gbullung['datetime'])

#here, the farmers are assessed
i=0
n=0
performance_single_farmer = pd.DataFrame({'rho': [], 'p': [],'hk_score':[],'Accuracy':[] ,'locationid': [], 'regionid':[], 'nmb_forecast':[]})
for i in range(len(users)):
    usr = users.loc[i,'locationid']
    region_user = users.loc[i, 'regionid']
    if_forecast_usr = if_forecast.loc[(if_forecast['locationid']==usr),['datetime','value', 'locationid']]
    if_forecast_usr['datetime'] = pd.to_datetime(if_forecast_usr['datetime'])
    #how many forecast did this farmer make?
    nmbr_if = len(if_forecast_usr)
    df_r_stat = if_forecast_usr.merge(obs_farmers, left_on='datetime', right_on = 'datetime')
    #spearman's rho statistical test (testing how accurate the categories are
    rho, p = spearmanr(df_r_stat['value_x'], df_r_stat['value_y'])
    #applying the contingency table needed for the HK score
    A = 0
    B = 0
    C = 0
    D = 0
    for i in df_r_stat.index:
        if df_r_stat.loc[i, 'value_y'] > 0:
            if df_r_stat.loc[i, 'value_x'] > 0:
                A += 1
            if df_r_stat.loc[i, 'value_x'] == 0:
                B += 1
        if df_r_stat.loc[i, 'value_y'] == 0:
            if df_r_stat.loc[i, 'value_x'] > 0:
                C += 1
            if df_r_stat.loc[i, 'value_x'] == 0:
                D += 1
    #applying the HK score and accuracy (is not in use now, remove # to use
    # try:
    #     HK_score = HK_skill_score(A, B, C, D)
    # except ZeroDivisionError:
    #     continue
    # Acc = Accuracy(A,B,C,D)
    performance_single_farmer_df = pd.DataFrame({'rho': [rho], 'p': [p],'hk_score':[HK_score],'Accuracy':[Acc], 'locationid': [usr], 'regionid': [region_user], 'nmb_forecast':[nmbr_if]})
    performance_single_farmer = pd.concat([performance_single_farmer,performance_single_farmer_df])

performance_single_farmer = performance_single_farmer.dropna()

#determine the threshold of when a farmer is considered 'good'
performance_single_farmer_high = performance_single_farmer.loc[performance_single_farmer['Accuracy']>0.6]
# performance_single_farmer_high = performance_single_farmer_high.loc[performance_single_farmer_high['p']<0.2]

#select all the forecasts made by the good farmers
forecasts_of_good_farmers = if_forecast.loc[if_forecast['locationid'].isin(performance_single_farmer_high['locationid'])]
forecasts_of_good_farmers_comb = forecasts_of_good_farmers.groupby('datetime')['value'].agg(pd.Series.mode)


#If there are multiple forecasts by good farmers on one day, this combined them
forecasts_of_good_farmers_final = pd.DataFrame(index=np.arange(1))
for i in forecasts_of_good_farmers_comb.index:
    if type(forecasts_of_good_farmers_comb[i]) == int:
        forecasts_of_good_farmers_final[i] = forecasts_of_good_farmers_comb[i]
    elif len(forecasts_of_good_farmers_comb[i]) > 2:
        forecasts_of_good_farmers_final[i] = np.mean(forecasts_of_good_farmers_comb[i])
    elif len(forecasts_of_good_farmers_comb[i]) == 2:
        if forecasts_of_good_farmers_comb[i][0] == 0:
            forecasts_of_good_farmers_final[i] = 0
        if forecasts_of_good_farmers_comb[i][0] != 0:
            forecasts_of_good_farmers_final[i] = np.mean(forecasts_of_good_farmers_comb[i])
forecasts_of_good_farmers_final = forecasts_of_good_farmers_final.transpose()
forecasts_of_good_farmers_final = forecasts_of_good_farmers_final.rename(columns={0:'value_x'})

#some stupid datetime problems are fixed here
combined_NNY_if_final.index = pd.to_datetime(combined_NNY_if_final.index)
forecasts_of_good_farmers_final.index = pd.to_datetime(forecasts_of_good_farmers_final.index)

#Here the dataframe is made with the forecasts of the good farmers overwriting the other forecasts
best_farmers = combined_NNY_if_final.copy()
best_farmers.update(forecasts_of_good_farmers_final)

# combined_NNY_if_final_combined.to_csv('C:/Users/joepb/PycharmProjects/data_storage/farmer_forecast_combined.csv')
# best_farmers = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/farmer_forecast_combined.csv


#PART 3 -> using this data to make the figures of the the thesis
del best_farmers['datetime']
best_farmers.reset_index(inplace = True)
del combined_NNY_if_final['datetime']
combined_NNY_if_final.reset_index(inplace = True)


MB_prob_prec = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/MB_prob_prec_total.csv')
MB_prob_prec['datetime'] = pd.to_datetime(MB_prob_prec['datetime'])
MB_prob_prec = MB_prob_prec.rename(columns={'value_x': 'MB_rain_amount', 'value_y': 'MB_rain_prob'})
del MB_prob_prec['cat_x']
del MB_prob_prec['cat_y']
for i in MB_prob_prec.index:
    if MB_prob_prec.loc[i, 'MB_rain_prob'] < 60:
        MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = 0
    if MB_prob_prec.loc[i, 'MB_rain_prob'] > 60:
        MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = MB_prob_prec.loc[i, 'MB_rain_amount']
    if np.isnan(MB_prob_prec.loc[i, 'MB_rain_prob']) == True:
        if MB_prob_prec.loc[i, 'MB_rain_amount'] < 1:
            MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = 0
        else:
            MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = MB_prob_prec.loc[i, 'MB_rain_amount']
del MB_prob_prec['MB_rain_prob']
del MB_prob_prec['MB_rain_amount']

MB_prob_prec = MB_prob_prec.merge(best_farmers,left_on = 'datetime',right_on = 'datetime')
MB_prob_prec = MB_prob_prec.loc[:,['datetime','MB_rain_amount_filt','value_y','value_x']]
MB_prob_prec.rename(columns = {'MB_rain_amount_filt':'value'},inplace=True)
# best_farmers = best_farmers.rename(columns={'index':'datetime','Farmer_forecast':'value'})
# best_farmers['datetime'] = pd.to_datetime(best_farmers['datetime'])
# best_farmers = best_farmers.merge(obs_farmers,left_on = 'datetime' , right_on = 'datetime')
# best_farmers = best_farmers.merge(forecasts_nakpanzoo_sameday_prec,left_on = 'datetime',right_on = 'datetime')
# best_farmers = best_farmers.loc[best_farmers['datetime'] < '2022-10-15 00:00:00']


communities = []
communities.append(('Yapalsi',test_yapalsi_SF))
communities.append(('Nakpanzoo',test_nakpanzoo_SF))
# communities.append(('Gbulung',test_gbullung_SF))
communities.append(('Combined',combined_NNY_if_final))
communities.append(('Best farmers',best_farmers))
communities.append(('Filtered forecast',MB_prob_prec))

comm_results = pd.DataFrame(index = np.arange(4))
comm_results = comm_results.rename(index={0:'HK_score',1:'Accuracy',2:'HK_score_SF',3:'Accuracy_SF'})


for name,comm in communities:
    A = 0
    B = 0
    C = 0
    D = 0
    for i in comm.index:
        if comm.loc[i,'value_y'] > 0:
            if comm.loc[i,'value_x'] > 0:
                A += 1
            if comm.loc[i,'value_x'] == 0:
                B += 1
        if comm.loc[i,'value_y'] == 0:
            if comm.loc[i,'value_x'] > 0:
                C += 1
            if comm.loc[i,'value_x'] == 0:
                D += 1
    comm_results.loc['HK_score',name] = HK_skill_score(A, B, C, D)
    comm_results.loc['Accuracy',name] = Accuracy(A,B,C,D)
    A = 0
    B = 0
    C = 0
    D = 0
    comm_grouped_y = comm.groupby('datetime')['value_y'].agg(pd.Series.mode)
    comm_grouped_value = comm.groupby('datetime')['value'].agg(pd.Series.mode)
    for i in comm_grouped_y.index:
        if comm_grouped_y.loc[i] > 0:
            if comm_grouped_value.loc[i] > 0:
                A += 1
            if comm_grouped_value.loc[i] == 0:
                B += 1
        if comm_grouped_y.loc[i] == 0:
            if comm_grouped_value.loc[i] > 0:
                C += 1
            if comm_grouped_value.loc[i] == 0:
                D += 1
    print(A+B+C+D)
    print(HK_skill_score(A,B,C,D))
    comm_results.loc['HK_score_SF',name] = HK_skill_score(A, B, C, D)
    comm_results.loc['Accuracy_SF',name] = Accuracy(A,B,C,D)
    # A = 0
    # B = 0
    # C = 0
    # D = 0
    # for i in comm.index:
    #     if comm.loc[i,'value_y'] > 0:
    #         if comm.loc[i,'value'] > 0:
    #             A += 1
    #         if comm.loc[i,'value'] == 0:
    #             B += 1
    #     if comm.loc[i,'value_y'] == 0:
    #         if comm.loc[i,'value'] > 0:
    #             C += 1
    #         if comm.loc[i,'value'] == 0:
    #             D += 1
    # print(A+B+C+D)
    # print(HK_skill_score(A,B,C,D))
    # comm_results.loc['HK_score_SF',name] = HK_skill_score(A, B, C, D)
    # comm_results.loc['Accuracy_SF',name] = Accuracy(A,B,C,D)

import matplotlib as plt
import numpy as np
import matplotlib.pyplot as plt

X = comm_results.columns
HK_FF = comm_results.loc['HK_score',['Yapalsi','Nakpanzoo','Combined', 'Best farmers']]
Acc_FF = comm_results.loc['Accuracy',['Yapalsi','Nakpanzoo','Combined', 'Best farmers']]
HK_SF = comm_results.loc['HK_score_SF',['Yapalsi','Nakpanzoo','Combined', 'Best farmers']]
Acc_SF = comm_results.loc['Accuracy_SF',['Yapalsi','Nakpanzoo','Combined', 'Best farmers']]
X_axis = np.arange(len(X)-1)
# my_colors = ['#5e98d9', '#1c7cbd', '#4a4a4a', '#8cce6e']
my_colors = ['#D2691E','#653700','#7BC8F6','#000080']

fig, ax = plt.subplots()

ax.bar(X_axis - 0.32, HK_FF, 0.2, label='HK score (LF)', color=my_colors[0])
ax.bar(X_axis - 0.12, Acc_FF, 0.2, label='Accuracy (LF)', color=my_colors[1])
ax.bar(X_axis + 0.12, HK_SF, 0.2, label='HK score (SF)', color=my_colors[2])
ax.bar(X_axis + 0.32, Acc_SF, 0.2, label='Accuracy (SF)', color=my_colors[3])

ax.bar(4.3 - 0.32, comm_results.loc['HK_score','Filtered forecast'], 0.2, color=my_colors[0])
ax.bar(4.3 - 0.12, comm_results.loc['Accuracy','Filtered forecast'], 0.2, color=my_colors[1])
ax.bar(4.3 + 0.12, comm_results.loc['HK_score_SF','Filtered forecast'], 0.2, color=my_colors[2])
ax.bar(4.3 + 0.32, comm_results.loc['Accuracy_SF','Filtered forecast'], 0.2, color=my_colors[3])


ax.set_xticks([0,1,2,3,4.3],align='edge')
ax.set_xticklabels(X, fontsize=10.5)
ax.text(-0.5,0.74,s = 'b', fontsize='x-large', verticalalignment='top', fontfamily='serif',horizontalalignment = 'left' )
# ax.set_xlabel('Filter factor (FF) and subsample factor (SF)', fontsize=12)
ax.set_ylabel('Skill and Accuracy', fontsize=12)
# ax.set_title('Comparison of skill and accuracy between FF and SF', fontsize=14)
ax.legend(fontsize=10, loc='upper center', bbox_to_anchor=(0.48, 1.1), ncol=4)

ax.yaxis.grid(True, linestyle='--')

plt.savefig('C:/Users/joepb/OneDrive/Documenten/Wageningen - Uni/Master Thesis/Draft thesis figures and docs/bar_plot_FF_SF_final_2022_without_Gbulung.png',bbox_inches= 'tight', )
plt.show()


Janina_FF = pd.read_csv('C:/Data_waterapps/Data_Janina/Tamale_overall_farmer_forecast_janina.csv',sep = ';')
Janina_FF['datetime'] = pd.to_datetime(Janina_FF['datetime'], format = '%d-%m-%Y')
janina_datetime = Janina_FF['datetime']
Janina_FF.fillna(0, inplace=True)
del Janina_FF['datetime']

for i in Janina_FF.index:
    Janina_FF.loc[i,'rain'] = sum(Janina_FF.loc[i, ['low', 'medium', 'high']])

df_raw_data = pd.read_csv('C:\Data_waterapps\Data_Janina\Raw data and data analysis\data, scripts for analysis, figures\data_(raw_and_preprocessed)\IF_and_rainfall_observations_Tamale_FSapp_entries.csv',sep=';')
df_raw_data = df_raw_data.replace('tie','x')
df_raw_data = df_raw_data[['Date','How many mm did the rain gauge indicate?','scorching_sun','strong_wind_W_E_direction','dark_clouds','bird_sounds','ants_carry_food_to_hole','frogs_squeaking','red_circle_around_moon','dew']]

Janina_FF['observation'] = df_raw_data.loc[:,'How many mm did the rain gauge indicate?']

Janina_FF_results = pd.DataFrame(columns = np.arange(3),index = np.arange(2))
Janina_FF_results = Janina_FF_results.rename(index={0:'HK_score',1:'Accuracy'})
Janina_FF_results = Janina_FF_results.rename(columns={0:'Farmers',1:'Farmers combined',2:'Meteoblue'})


A=0
B=0
C=0
D=0
for i in Janina_FF.index:
    if Janina_FF.loc[i, 'observation'] > 0:
        A += Janina_FF.loc[i, 'rain']
        B += Janina_FF.loc[i, 'no_rain']
    if Janina_FF.loc[i, 'observation'] == 0:
        C += Janina_FF.loc[i, 'rain']
        D += Janina_FF.loc[i, 'no_rain']
Janina_FF_results.loc['HK_score', 'Farmers'] = HK_skill_score(A, B, C, D)
Janina_FF_results.loc['Accuracy', 'Farmers'] = Accuracy(A, B, C, D)

farmer_forecast = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/farmer_forecast_combined.csv')
farmer_forecast['datetime'] = pd.to_datetime(farmer_forecast['Unnamed: 0'])
del farmer_forecast['Unnamed: 0']
farmer_forecast = farmer_forecast.loc[farmer_forecast['datetime'] < '2020-11-20 00:00:00']

A=0
B=0
C=0
D=0
for i in farmer_forecast.index:
    if Janina_FF.loc[i, 'observation'] > 0:
        if farmer_forecast.loc[i, 'Farmer_forecast'] > 0:
            A += 1
        if farmer_forecast.loc[i, 'Farmer_forecast'] == 0:
            B += 1
    if Janina_FF.loc[i, 'observation'] == 0:
        if farmer_forecast.loc[i, 'Farmer_forecast'] > 0:
            C += 1
        if farmer_forecast.loc[i, 'Farmer_forecast'] == 0:
            D += 1
Janina_FF_results.loc['HK_score', 'Farmers combined'] = HK_skill_score(A, B, C, D)
Janina_FF_results.loc['Accuracy', 'Farmers combined'] = Accuracy(A, B, C, D)

MB_prob_prec = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/MB_prob_prec_total.csv')
MB_prob_prec['datetime'] = pd.to_datetime(MB_prob_prec['datetime'])
MB_prob_prec = MB_prob_prec.rename(columns={'value_x': 'MB_rain_amount', 'value_y': 'MB_rain_prob'})
del MB_prob_prec['cat_x']
del MB_prob_prec['cat_y']
MB_prob_prec = MB_prob_prec.loc[MB_prob_prec['datetime']<'2020-11-01 00:00:00']

for i in MB_prob_prec.index:
    if MB_prob_prec.loc[i, 'MB_rain_prob'] < 60:
        MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = 0
    if MB_prob_prec.loc[i, 'MB_rain_prob'] > 60:
        MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = MB_prob_prec.loc[i, 'MB_rain_amount']
    if np.isnan(MB_prob_prec.loc[i, 'MB_rain_prob']) == True:
        if MB_prob_prec.loc[i, 'MB_rain_amount'] < 1:
            MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = 0
        else:
            MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = MB_prob_prec.loc[i, 'MB_rain_amount']
del MB_prob_prec['MB_rain_prob']

A=0
B=0
C=0
D=0
for i in MB_prob_prec.index:
    if Janina_FF.loc[i, 'observation'] > 0:
        if MB_prob_prec.loc[i, 'MB_rain_amount'] > 0:
            A += 1
        if MB_prob_prec.loc[i, 'MB_rain_amount'] == 0:
            B += 1
    if Janina_FF.loc[i, 'observation'] == 0:
        if MB_prob_prec.loc[i, 'MB_rain_amount'] > 0:
            C += 1
        if MB_prob_prec.loc[i, 'MB_rain_amount'] == 0:
            D += 1
Janina_FF_results.loc['HK_score', 'Meteoblue'] = HK_skill_score(A, B, C, D)
Janina_FF_results.loc['Accuracy', 'Meteoblue'] = Accuracy(A, B, C, D)

A=0
B=0
C=0
D=0
for i in MB_prob_prec.index:
    if Janina_FF.loc[i, 'observation'] > 0:
        if MB_prob_prec.loc[i, 'MB_rain_amount_filt'] > 0:
            A += 1
        if MB_prob_prec.loc[i, 'MB_rain_amount_filt'] == 0:
            B += 1
    if Janina_FF.loc[i, 'observation'] == 0:
        if MB_prob_prec.loc[i, 'MB_rain_amount_filt'] > 0:
            C += 1
        if MB_prob_prec.loc[i, 'MB_rain_amount_filt'] == 0:
            D += 1
Janina_FF_results.loc['HK_score', 'Meteoblue filtered'] = HK_skill_score(A, B, C, D)
Janina_FF_results.loc['Accuracy', 'Meteoblue filtered'] = Accuracy(A, B, C, D)

X = Janina_FF_results.loc[:,['Farmers combined','Meteoblue','Meteoblue filtered']].columns
HK = Janina_FF_results.loc['HK_score','Farmers combined']
Acc = Janina_FF_results.loc['Accuracy','Farmers combined']
HK_SF = Janina_FF_results.loc['HK_score','Meteoblue']
Acc_SF = Janina_FF_results.loc['Accuracy','Meteoblue']
HK_SF_filt = Janina_FF_results.loc['HK_score','Meteoblue filtered']
Acc_SF_filt = Janina_FF_results.loc['Accuracy','Meteoblue filtered']

X_axis = np.arange(len(X))
X_axis_ticks = np.arange(len(X))

# my_colors = ['#5e98d9', '#1c7cbd', '#4a4a4a', '#8cce6e']
my_colors = ['#D2691E','#653700','#7BC8F6','#000080']

fig, ax = plt.subplots()

ax.bar(0 - 0.1, HK, 0.2, label='HK score (LF)', color=my_colors[0])
ax.bar(0 + 0.1, Acc, 0.2, label='Accuracy (LF)', color=my_colors[1])
ax.bar(1 - 0.1, HK_SF, 0.2, label='HK score (SF)', color=my_colors[2])
ax.bar(1 + 0.1, Acc_SF, 0.2, label='Accuracy(SF)', color=my_colors[3])
ax.bar(2 - 0.1, HK_SF_filt, 0.2, color=my_colors[2])
ax.bar(2 + 0.1, Acc_SF_filt, 0.2, color=my_colors[3])


ax.set_xticks(X_axis_ticks)
ax.set_xticklabels(X)
ax.text(-0.25,0.74,s = 'a', fontsize='x-large', verticalalignment='top', fontfamily='serif',horizontalalignment = 'left' )
# ax.set_xlabel('Filter factor (FF) and subsample factor (SF)', fontsize=12)
ax.set_ylabel('Skill and Accuracy', fontsize=12)
# ax.set_title('Comparison of skill and accuracy between FF and SF', fontsize=14)
ax.legend(fontsize=10, loc='upper center', bbox_to_anchor=(0.48, 1.1), ncol=4)

ax.yaxis.grid(True, linestyle='--')

plt.savefig('C:/Users/joepb/OneDrive/Documenten/Wageningen - Uni/Master Thesis/Draft thesis figures and docs/bar_plot_FF_SF_final_2020_alt.png',bbox_inches= 'tight')
plt.show()


#-----------------------------------------------------------------------
#Make figure comparing the skill of the RF-RF to the other forecasts (last figure of thesis)

final_results = pd.DataFrame(index = ['Accuracy','HK_score'])
final_results.loc['HK_score','HF_forecast'] = HK_mean.loc[0,'RF-RF']
final_results.loc['Accuracy','HF_forecast'] = Acc_mean.loc[0,'RF-RF']
final_results.loc['HK_score','Indicator forecast'] = HK_ind.loc['Average HK score','RF']
final_results.loc['Accuracy','Indicator forecast'] = Acc_ind.loc['Average Accuracy','RF']

MB_prob_prec = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/MB_prob_prec_total.csv')
MB_prob_prec['datetime'] = pd.to_datetime(MB_prob_prec['datetime'])
MB_prob_prec = MB_prob_prec.rename(columns={'value_x': 'MB_rain_amount', 'value_y': 'MB_rain_prob'})
del MB_prob_prec['cat_x']
del MB_prob_prec['cat_y']
predicitons_with_farmer_obs_skill = predicitons_with_farmer_obs.copy()
predicitons_with_farmer_obs_skill = predicitons_with_farmer_obs_skill.merge(MB_prob_prec,left_on = 'datetime',right_on = 'datetime')

predicitons_with_farmer_obs_skill.drop('datetime',axis=1,inplace=True)
del predicitons_with_farmer_obs_skill['probability_no_rain']
del predicitons_with_farmer_obs_skill['probability_rain']
del predicitons_with_farmer_obs_skill['MB_rain_prob']


for j in predicitons_with_farmer_obs_skill:
    A=0
    B=0
    C=0
    D=0
    for i in predicitons_with_farmer_obs_skill.index:
        if predicitons_with_farmer_obs_skill.loc[i, 'Farmer_obs'] > 0:
            if predicitons_with_farmer_obs_skill.loc[i, j] > 0:
                A += 1
            if predicitons_with_farmer_obs_skill.loc[i, j] == 0:
                B += 1
        if predicitons_with_farmer_obs_skill.loc[i, 'Farmer_obs'] == 0:
            if predicitons_with_farmer_obs_skill.loc[i, j] > 0:
                C += 1
            if predicitons_with_farmer_obs_skill.loc[i, j] == 0:
                D += 1
    final_results.loc['HK_score', j] = HK_skill_score(A, B, C, D)
    final_results.loc['Accuracy', j] = Accuracy(A, B, C, D)
del final_results['Farmer_obs']

my_colors = ['#D2691E','#653700','#7BC8F6','#000080','#BBF90F','#008000']
X = ['Hybrid forecast','Indicator forecast','Local forecast','Scientific Forecast']

fig, ax = plt.subplots()

bar = ax.bar(0 - 0.1,final_results.loc['HK_score','HF_forecast'],0.2, label='HK score (HF)', color=my_colors[4])
bar1 = ax.bar(0 + 0.1,final_results.loc['Accuracy','HF_forecast'],0.2, label='Accuracy (HF)', color=my_colors[5])

ax.bar(1 - 0.1, final_results.loc['HK_score','Indicator forecast'], 0.2, color=my_colors[0])
ax.bar(1 + 0.1, final_results.loc['Accuracy','Indicator forecast'], 0.2, color=my_colors[1])


bar2 = ax.bar(2 - 0.1, final_results.loc['HK_score','Farmer_forecast'], 0.2, label='HK score (LF)', color=my_colors[0])
bar3 = ax.bar(2 + 0.1, final_results.loc['Accuracy','Farmer_forecast'], 0.2, label='Accuracy (LF)', color=my_colors[1])
bar4 = ax.bar(3 - 0.1, final_results.loc['HK_score','MB_rain_amount'], 0.2, label='HK score (SF)', color=my_colors[2])
bar5 = ax.bar(3 + 0.1, final_results.loc['Accuracy','MB_rain_amount'], 0.2, label='Accuracy(SF)', color=my_colors[3])



y_error = std_dev_rndm_test_HK.loc[0,'VC-RF']
error = ax.errorbar(0-0.1, final_results.loc['HK_score','HF_forecast'], color='r',yerr=y_error,fmt="o",label= 'Standard dev.')

y_error1 = std_dev_rndm_test_acc.loc[0,'VC-RF']
error1 = ax.errorbar(0+0.1, final_results.loc['Accuracy','HF_forecast'], color='r',yerr=y_error1,fmt="o")

y_error2 = std_dev_rndm_test_HK_ind.loc['HK standard deviation','RF']
error2 = ax.errorbar(1-0.1, final_results.loc['HK_score','Indicator forecast'], color='r',yerr=y_error1,fmt="o")

y_error3 = std_dev_rndm_test_acc_ind.loc['Acc standard deviation','RF']
error3 = ax.errorbar(1+0.1, final_results.loc['Accuracy','Indicator forecast'], color='r',yerr=y_error1,fmt="o")




ax.set_xticks([0,1,2,3])
ax.set_xticklabels(X)
# ax.text(-0.25,0.74,s = 'a', fontsize='x-large', verticalalignment='top', fontfamily='serif',horizontalalignment = 'left' )
# ax.set_xlabel('Filter factor (FF) and subsample factor (SF)', fontsize=12)
ax.set_ylabel('Skill and Accuracy', fontsize=12)
# ax.set_title('Comparison of skill and accuracy between FF and SF', fontsize=14)
handles = [bar, bar1,bar2,bar3,bar4,bar5,error]
ax.legend(handles =handles, fontsize=10, loc='upper center', bbox_to_anchor=(0.48, 1.15), ncol=4)

# labels = ['HK score (HF)','Accuracy (HF)','Standard deviation']

ax.yaxis.grid(True, linestyle='--')

plt.savefig('C:/Users/joepb/OneDrive/Documenten/Wageningen - Uni/Master Thesis/Draft thesis figures and docs/final_comparison.png',bbox_inches= 'tight')
plt.show()

